import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
import seaborn as sns
df=pd.read_csv('allFiles.csv',index_col=0)
df.head()
| fx | fy | fz | mx | my | mz | Trial Number | Trial Type | Subject Number | Body Mass (Kg) | AP Sway (cm) | ML Sway (cm) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -6.47778 | -11.10205 | 778.78125 | -0.38182 | -16.04395 | -22.75000 | 1 | 1 | Subject 001 | 79.2834 | 0.049028 | -2.060136 |
| 1 | -6.91699 | -10.65332 | 777.71875 | 0.03605 | -15.95557 | -22.76465 | 1 | 1 | Subject 001 | 79.2834 | -0.004635 | -2.051586 |
| 2 | -6.55713 | -11.10156 | 778.78125 | -0.20785 | -15.94238 | -22.77148 | 1 | 1 | Subject 001 | 79.2834 | 0.026689 | -2.047093 |
| 3 | -6.64404 | -10.92285 | 778.40625 | -0.30165 | -15.87402 | -22.70313 | 1 | 1 | Subject 001 | 79.2834 | 0.038752 | -2.039298 |
| 4 | -6.74341 | -10.64600 | 779.84375 | -0.09583 | -15.91504 | -22.78906 | 1 | 1 | Subject 001 | 79.2834 | 0.012288 | -2.040799 |
df.tail()
| fx | fy | fz | mx | my | mz | Trial Number | Trial Type | Subject Number | Body Mass (Kg) | AP Sway (cm) | ML Sway (cm) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 14995 | 3.85071 | -7.57349 | 626.18750 | -13.28613 | 30.21777 | -3.29358 | 36 | 6 | Subject 008 | 61.623817 | 2.121749 | 4.825674 |
| 14996 | 3.67505 | -7.21484 | 625.12500 | -13.25586 | 30.31641 | -3.33411 | 36 | 6 | Subject 008 | 61.623817 | 2.120513 | 4.849656 |
| 14997 | 3.41003 | -6.76709 | 625.84375 | -13.22949 | 30.23730 | -3.40369 | 36 | 6 | Subject 008 | 61.623817 | 2.113865 | 4.831446 |
| 14998 | 3.05103 | -7.47876 | 626.53125 | -13.20020 | 30.25684 | -3.31079 | 36 | 6 | Subject 008 | 61.623817 | 2.106870 | 4.829263 |
| 14999 | 3.31970 | -7.12305 | 625.50000 | -13.30273 | 30.30273 | -3.27930 | 36 | 6 | Subject 008 | 61.623817 | 2.126735 | 4.844561 |
df.sample(5)
| fx | fy | fz | mx | my | mz | Trial Number | Trial Type | Subject Number | Body Mass (Kg) | AP Sway (cm) | ML Sway (cm) | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 13347 | -7.83765 | -10.24561 | 791.71875 | 6.52124 | -0.81161 | -19.33398 | 19 | 1 | Subject 001 | 79.283400 | -0.823681 | -0.102512 |
| 4899 | -2.62964 | -4.45581 | 786.15625 | -3.81213 | 31.75586 | -8.51221 | 16 | 4 | Subject 003 | 72.505355 | 0.484907 | 4.039383 |
| 13386 | 0.14735 | -7.71460 | 571.87500 | 9.97510 | -13.05713 | -9.95898 | 7 | 1 | Subject 007 | 56.076363 | -1.744280 | -2.283214 |
| 2186 | -7.97803 | -10.50342 | 796.00000 | -2.60474 | 4.69678 | -19.92090 | 23 | 5 | Subject 001 | 79.283400 | 0.327229 | 0.590048 |
| 5352 | -2.58728 | -6.48511 | 533.53125 | 30.89355 | -4.84692 | -9.59717 | 8 | 2 | Subject 006 | 52.497061 | -5.790392 | -0.908460 |
df.shape
(4141622, 12)
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 4141622 entries, 0 to 14999 Data columns (total 12 columns): # Column Dtype --- ------ ----- 0 fx float64 1 fy float64 2 fz float64 3 mx float64 4 my float64 5 mz float64 6 Trial Number int64 7 Trial Type int64 8 Subject Number object 9 Body Mass (Kg) float64 10 AP Sway (cm) float64 11 ML Sway (cm) float64 dtypes: float64(9), int64(2), object(1) memory usage: 410.8+ MB
df.isnull().sum()
fx 0 fy 0 fz 0 mx 0 my 0 mz 0 Trial Number 0 Trial Type 0 Subject Number 0 Body Mass (Kg) 0 AP Sway (cm) 0 ML Sway (cm) 0 dtype: int64
df.describe()
| fx | fy | fz | mx | my | mz | Trial Number | Trial Type | Body Mass (Kg) | AP Sway (cm) | ML Sway (cm) | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 4.141622e+06 | 4.141622e+06 | 4.141622e+06 | 4.141622e+06 | 4.141622e+06 | 4.141622e+06 | 4.141622e+06 | 4.141622e+06 | 4.141622e+06 | 4.141622e+06 | 4.141622e+06 |
| mean | -1.618832e+00 | -6.067932e+00 | 6.225338e+02 | 1.045832e+00 | 1.982484e-01 | -9.915073e+00 | 1.856841e+01 | 3.497050e+00 | 6.066514e+01 | -2.527847e-01 | -1.130701e-02 |
| std | 6.444667e+00 | 5.163432e+00 | 1.048473e+02 | 1.459436e+01 | 1.016796e+01 | 4.304973e+00 | 1.043083e+01 | 1.710644e+00 | 1.004990e+01 | 2.333414e+00 | 1.663172e+00 |
| min | -1.455781e+02 | -9.714844e+01 | 2.827150e+00 | -8.621094e+01 | -1.003633e+02 | -4.004688e+01 | 1.000000e+00 | 1.000000e+00 | 4.815877e+01 | -1.178476e+01 | -3.807063e+01 |
| 25% | -3.529790e+00 | -9.593750e+00 | 5.490625e+02 | -6.791260e+00 | -6.246768e+00 | -1.054639e+01 | 1.000000e+01 | 2.000000e+00 | 5.249706e+01 | -1.759027e+00 | -1.050991e+00 |
| 50% | -1.272030e+00 | -5.771730e+00 | 5.980000e+02 | 2.470210e+00 | 3.567300e-01 | -9.020510e+00 | 1.900000e+01 | 3.000000e+00 | 6.162382e+01 | -4.092785e-01 | 5.898343e-02 |
| 75% | 1.171750e+00 | -2.755620e+00 | 6.537188e+02 | 1.052832e+01 | 6.556400e+00 | -7.652100e+00 | 2.800000e+01 | 5.000000e+00 | 6.281340e+01 | 1.095831e+00 | 1.056186e+00 |
| max | 1.975547e+02 | 8.324219e+01 | 9.651562e+02 | 6.685938e+01 | 6.570703e+01 | 4.880620e+00 | 3.700000e+01 | 6.000000e+00 | 7.928340e+01 | 3.445227e+01 | 2.348549e+01 |
df['Trial Type'].unique()
array([1, 2, 3, 4, 5, 6])
df['Trial Number'].unique()
array([ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34,
35, 36, 37])
df['AP Sway (cm)'].describe()
count 4.141622e+06 mean -2.527847e-01 std 2.333414e+00 min -1.178476e+01 25% -1.759027e+00 50% -4.092785e-01 75% 1.095831e+00 max 3.445227e+01 Name: AP Sway (cm), dtype: float64
df['ML Sway (cm)'].describe()
count 4.141622e+06 mean -1.130701e-02 std 1.663172e+00 min -3.807063e+01 25% -1.050991e+00 50% 5.898343e-02 75% 1.056186e+00 max 2.348549e+01 Name: ML Sway (cm), dtype: float64
abs(df.corr()['AP Sway (cm)'].sort_values(ascending=False))
AP Sway (cm) 1.000000 Trial Type 0.359294 Body Mass (Kg) 0.267757 fz 0.215750 mz 0.134368 my 0.024728 Trial Number 0.002883 ML Sway (cm) 0.000972 fx 0.007695 fy 0.157106 mx 0.966911 Name: AP Sway (cm), dtype: float64
abs(df.corr()['ML Sway (cm)'].sort_values(ascending=False))
ML Sway (cm) 1.000000 my 0.944264 mz 0.294819 fz 0.154054 Body Mass (Kg) 0.108027 Trial Number 0.033759 AP Sway (cm) 0.000972 Trial Type 0.005355 mx 0.021619 fy 0.029860 fx 0.116864 Name: ML Sway (cm), dtype: float64
#df['Subject Number'].replace({'Subject 00':''},regex=True,inplace=True)
#df['Subject Number']=df['Subject Number'].astype(int);
abs(df.corr()['Subject Number'].sort_values(ascending=False))
Subject Number 1.000000 mz 0.649267 fx 0.175830 ML Sway (cm) 0.070002 my 0.068177 fy 0.064284 mx 0.040048 Trial Type 0.004206 Trial Number 0.006049 AP Sway (cm) 0.036516 Body Mass (Kg) 0.488195 fz 0.526974 Name: Subject Number, dtype: float64
abs(df.corr()['Trial Type'].sort_values(ascending=False))
Trial Type 1.000000 AP Sway (cm) 0.359294 Trial Number 0.154506 mz 0.060668 Body Mass (Kg) 0.005416 Subject Number 0.004206 my 0.000423 fz 0.000102 ML Sway (cm) 0.005355 fx 0.038929 fy 0.044938 mx 0.348358 Name: Trial Type, dtype: float64
abs(df.corr()['Body Mass (Kg)'].sort_values(ascending=False))
Body Mass (Kg) 1.000000 fz 0.959601 AP Sway (cm) 0.267757 ML Sway (cm) 0.108027 my 0.101717 Trial Type 0.005416 Trial Number 0.004147 fx 0.215465 fy 0.235221 mx 0.254419 Subject Number 0.488195 mz 0.638235 Name: Body Mass (Kg), dtype: float64
!pip3 install pandas_profiling --upgrade
import pandas_profiling
df.profile_report()